How fast does biological rock crust grow?

17 October, 2020

Setting general parameters:

set.seed(15102020)
bootstraps <- 1000
data_path <- "./Data/"
Proj_name <- "BRC_growth_rate"

Browns <- RColorBrewer::brewer.pal(n = 9, "YlOrBr")[9:6]
Greens <- RColorBrewer::brewer.pal(n = 9, "YlGn")[9:6]
Blues <- RColorBrewer::brewer.pal(n = 9, "YlGnBu")[9:6]
Gradient.colours <- c(Browns[1], Greens[1], Browns[2], Greens[2], Browns[3], Greens[3], Browns[4], Greens[4], Blues)

Description

This script reproduces all sequence analysis steps and plots included in the paper plus some additional exploratory analyses.

Load data

OTUmat <- t(read.csv(paste0(data_path, "Shivta_site_otuTab2.txt"), header = TRUE, row.names = 1))
sort.order <- as.numeric(gsub("OTU([0-9]+)", "\\1", colnames( OTUmat )))
OTUmat <- OTUmat[, order(sort.order )]

Metadata <- read.csv(paste0(data_path, "Shivta_metadata.csv"), row.names = 1, header = TRUE)

read_csv(paste0(data_path, "Shivta_metadata.csv"),
                     trim_ws = TRUE) %>%
  mutate_at(
    c(
      "Rock.type",
      "Location"
    ), 
    ~(factor(.))
  ) %>% 
  column_to_rownames("Sample.code") ->
  Metadata

row.names(OTUmat) <- gsub("(.*)Nimrod[0-9]+|Osnat[0-9]+", "\\1", row.names( OTUmat))
Metadata <- Metadata[order(row.names(Metadata)), ]
OTUmat <- OTUmat[order(row.names(OTUmat)), ]
# calculate sample size
Metadata$Library.size = rowSums(OTUmat)
Metadata$Location.rock <- with(Metadata, Location:Rock.type)

# Load taxonomy data
tax.file <- "Shivta_site_silva.nrv119.taxonomy"
Taxonomy <- read.table(paste0(data_path, tax.file), stringsAsFactors = FALSE) # read taxonomy file

# count how many ';' in each cell and add up to 6
for (i in 1:nrow(Taxonomy)){
  semicolons <- length(gregexpr(";", Taxonomy$V2[i] )[[1]])
  if (semicolons < 6){
    x <- paste0( rep("Unclassified;", 6 - semicolons ), collapse = "")
    Taxonomy$V2[i] <- paste0( Taxonomy$V2[i], x, sep = "")
  }
}
# split taxonomy to columns
do.call( "rbind", strsplit( Taxonomy$V1, ";", fixed = TRUE)) %>% 
  gsub( "size=([0-9]+)", "\\1", .) %>%
  data.frame( ., do.call( "rbind", strsplit( Taxonomy$V2, ";", fixed = TRUE)), stringsAsFactors = F) %>% 
  apply(., 2, function(x) gsub( "\\(.*\\)", "", x)) %>% 
  replace(., . == "unclassified", "Unclassified") -> 
  Taxonomy
colnames( Taxonomy ) <- c( "OTU", "Frequency", "Domain", "Phylum", "Class", "Order", "Family", "Genus" )
# rownames(Taxonomy) <- colnames(Rock_weathering_OTUmat)
rownames(Taxonomy) <- Taxonomy[, 1]

Tree_IQ <- read_tree(paste0(data_path, "Shivta_site_otuReps.filtered.align.treefile"))

# generate phyloseq object
Ps_obj <- phyloseq(otu_table(OTUmat, taxa_are_rows = FALSE),
                   tax_table(Taxonomy[, -c(1, 2)]),
                   sample_data(Metadata),
                   phy_tree(Tree_IQ)
)
# Reorder factors for plotting
sample_data(Ps_obj)$Location %<>% fct_relevel("Slope", "City")

Remove un- and mis-classified sequences, chloroplasts and mitochondria

domains2remove <- c("", "Eukaryota", "Unclassified")
classes2remove <- c("Chloroplast")
families2remove <- c("Mitochondria")
Ps_obj_filt <- subset_taxa(Ps_obj, !is.na(Phylum) &
                        !Domain %in% domains2remove &
                      !Class %in% classes2remove &
                      !Family %in% families2remove)

Inspect library size and number of OTU

Ps_obj_df <-
  as.data.frame(sample_data(Ps_obj_filt)) # Put sample_data into a ggplot-friendly data.frame
Ps_obj_df <- Ps_obj_df[order(Ps_obj_df$Library.size), ]
Ps_obj_df$Index <- seq(nrow(Ps_obj_df))
ggplot(data = Ps_obj_df, 
       aes(x = Index, y = Library.size, color = Location.rock)) + 
  geom_point(size = 4) + 
  scale_colour_manual(values = ggpomological:::pomological_palette[c(2, 1, 9, 3)], name = "Location.rock")

summary(sample_sums(Ps_obj_filt))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   23972   44989   54329   53254   61352   77463
summary(taxa_sums(Ps_obj_filt))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       3      70     191    1814     769   76180

Explore the prevalence of different taxa in the database

prevdf <- apply(X = otu_table(Ps_obj_filt),
                 MARGIN = ifelse(taxa_are_rows(Ps_obj_filt), yes = 1, no = 2),
                 FUN = function(x){sum(x > 0)})
# Add taxonomy and total read counts to this data.frame
prevdf <- data.frame(Prevalence = prevdf,
                      TotalAbundance = taxa_sums(Ps_obj_filt),
                      tax_table(Ps_obj_filt))
prevdf %>%
  group_by(Phylum) %>%
  summarise(`Mean prevalence` = mean(Prevalence),
            `Sum prevalence` = sum(Prevalence)) ->
  Prevalence_phylum_summary

Prevalence_phylum_summary %>% 
  kable(., digits = c(0, 1, 0)) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)

Phylum

Mean prevalence

Sum prevalence

Acidobacteria

11.7

152

Actinobacteria

16.3

3414

Aquificae

4.0

12

Armatimonadetes

7.6

76

Bacteroidetes

11.8

1027

Candidate_division_KB1

8.0

8

Candidate_division_OD1

4.0

4

Candidate_division_OP11

4.5

9

Candidate_division_TM7

7.9

158

Chloroflexi

11.0

1124

Cyanobacteria

14.9

506

Deinococcus-Thermus

15.8

174

Euryarchaeota

3.9

90

Firmicutes

13.4

214

Gemmatimonadetes

11.2

617

Nitrospirae

6.0

12

Planctomycetes

10.8

140

Proteobacteria

14.4

1817

Tenericutes

3.0

3

Verrucomicrobia

23.0

23

WCHB1-60

7.0

28

prevdf %>%
  group_by(Order) %>%
  summarise(`Mean prevalence` = mean(Prevalence),
            `Sum prevalence` = sum(Prevalence)) ->
  Prevalence_Order_summary

Prevalence_Order_summary %>% 
  kable(., digits = c(0, 1, 0)) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)

Order

Mean prevalence

Sum prevalence

AKIW781

9.4

198

AKYG1722

10.7

75

AT425-EubC11_terrestrial_group

10.2

378

Acidimicrobiales

14.7

368

Alteromonadales

12.0

12

Aquificales

4.0

12

Ardenticatenales

8.0

16

B103G10

7.0

7

BD2-11_terrestrial_group

5.0

5

Bacillales

13.7

164

Bacteroidales

17.0

17

Bdellovibrionales

8.0

40

Burkholderiales

18.5

204

C0119

5.0

5

Caldilineales

18.0

18

Campylobacterales

6.0

6

Caulobacterales

23.7

71

Chromatiales

20.0

20

Chthoniobacterales

23.0

23

Clostridiales

6.5

13

Corynebacteriales

8.0

40

Cytophagales

11.8

695

Deinococcales

15.1

151

Desulfovibrionales

5.0

5

E6aD10

11.0

11

EMP-G18

3.0

3

Enterobacteriales

22.0

44

Euzebyales

13.5

202

Flavobacteriales

12.7

38

Frankiales

20.9

356

Gaiellales

10.4

94

Gemmatimonadales

14.3

214

Halobacteriales

3.9

90

JG30-KF-CM45

13.0

468

Kineosporiales

20.2

121

Lactobacillales

18.5

37

Micrococcales

18.0

198

Micromonosporales

19.5

39

Myxococcales

10.2

51

Nitriliruptorales

9.7

58

Nitrosomonadales

11.5

23

Nitrospirales

6.0

12

Oceanospirillales

16.0

16

Orbales

7.0

7

Order_II

4.8

24

Order_III

8.7

26

PAUC43f_marine_benthic_group

2.0

2

Planctomycetales

9.2

74

Propionibacteriales

14.5

318

Pseudomonadales

14.7

103

Pseudonocardiales

18.0

306

Rhizobiales

16.1

386

Rhodobacterales

14.8

163

Rhodospirillales

11.8

271

Rickettsiales

11.2

146

Rubrobacterales

22.3

491

S0134_terrestrial_group

18.0

18

SBYG-4553

10.0

10

Solirubrobacterales

17.2

638

Sphaerobacterales

3.3

10

Sphingobacteriales

14.2

227

Sphingomonadales

19.6

196

Streptomycetales

24.5

49

Streptosporangiales

3.0

3

Subgroup_3

15.0

15

Subgroup_4

10.6

85

Subgroup_6

13.0

52

SubsectionI

13.0

13

SubsectionII

15.8

315

SubsectionIII

14.8

118

SubsectionIV

13.5

54

TRA3-20

14.0

14

Thermales

23.0

23

Thermophilales

15.0

30

Unclassified

9.2

642

Unknown_Order

9.8

98

Vampirovibrionales

6.0

6

WD2101_soil_group

13.0

39

Xanthomonadales

18.0

18

Based on that we will remove all phyla with a prevalence of under 8

Prevalence_phylum_summary %>% 
  filter(`Sum prevalence` < 8) %>% 
  dplyr::select(Phylum) %>% 
  map(as.character) %>% 
  unlist() ->
  filterPhyla
Ps_obj_filt %<>% subset_taxa(!Phylum %in% filterPhyla)
sample_data(Ps_obj_filt)$Library.size <- rowSums(otu_table(Ps_obj_filt))
print(Ps_obj)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 767 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 767 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 767 tips and 765 internal nodes ]
## taxa are columns
print(Ps_obj_filt)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 732 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 732 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 732 tips and 730 internal nodes ]
## taxa are columns

Plot general prevalence features of the phyla

# Subset to the remaining phyla
prevdf_phylum_filt <- subset(prevdf, Phylum %in% get_taxa_unique(Ps_obj_filt, "Phylum"))
ggplot(prevdf_phylum_filt,
       aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt), color = Phylum)) +
  # Include a guess for parameter
  geom_hline(yintercept = 0.05,
             alpha = 0.5,
             linetype = 2) + geom_point(size = 2, alpha = 0.7) +
  scale_x_log10() +  xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
  facet_wrap( ~ Phylum) + theme(legend.position = "none")

Plot general prevalence features of the top 20 orders

# Subset to the remaining phyla
prevdf_order_filt <- subset(prevdf, Order %in% get_taxa_unique(Ps_obj_filt, "Order"))
# grab the top 30 most abundant orders
prevdf_order_filt %>% 
  group_by(Order) %>%
  summarise(Combined.abundance = sum(TotalAbundance)) %>% 
  arrange(desc(Combined.abundance)) %>% 
  .[1:30, "Order"]  ->
  Orders2plot
prevdf_order_filt2 <- subset(prevdf, Order %in% Orders2plot$Order)
ggplot(prevdf_order_filt2,
       aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt), color = Order)) +
  # Include a guess for parameter
  geom_hline(yintercept = 0.05,
             alpha = 0.5,
             linetype = 2) + geom_point(size = 2, alpha = 0.7) +
  scale_x_log10() +  xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
  facet_wrap( ~ Order) + theme(legend.position = "none")

Unsupervised filtering by prevalence

We will remove all sequences which appear in less than 10% of the samples

# Define prevalence threshold as 10% of total samples
prevalenceThreshold <- 0.1 * nsamples(Ps_obj_filt)
prevalenceThreshold
## [1] 2.5
# Execute prevalence filter, using `prune_taxa()` function
keepTaxa <-
  row.names(prevdf_phylum_filt)[(prevdf_phylum_filt$Prevalence >= prevalenceThreshold)]
Ps_obj_filt  %<>%  prune_taxa(keepTaxa, .)
sample_data(Ps_obj_filt)$Library.size <- rowSums(otu_table(Ps_obj_filt))
print(Ps_obj)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 767 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 767 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 767 tips and 765 internal nodes ]
## taxa are columns
print(Ps_obj_filt)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 711 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 711 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 711 tips and 709 internal nodes ]
## taxa are columns

This removed 56 or 7% of the sequences.

Exploring the dataset features

First let’s look at the count data distribution after filtering:

PlotLibDist(Ps_obj_filt)

sample_data(Ps_obj_filt) %>% 
  as_tibble() %>% 
  dplyr::select(Sample.name, Library.size) %>% 
  as(., "data.frame") %>% 
  kable(.) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)

Sample.name

Library.size

City Chalk 2

49453

City Chalk 3

54157

City Chalk 4

44943

City Chalk 5

58835

City Chalk 6

77463

City Limestone 1

36122

City Limestone 2

52816

City Limestone 3

56370

City Limestone 4

53092

City Limestone 5

61350

City Limestone 6

54508

Slope Limestone 1

75580

Slope Chalk 1

51652

Slope Chalk 2

45201

Slope Chalk 3

33632

Slope Chalk 4

70890

Slope Chalk 5

71460

Slope Chalk 6

58961

Slope Chalk 7

36759

City Chalk 1

23968

Slope Limestone 2

62460

Slope Limestone 3

36635

Slope Limestone 4

57291

Slope Limestone 5

64511

Slope Limestone 6

41565

The figure and table indicate only a small deviation in the number of reads per samples.

(mod1 <- adonis(
  otu_table(Ps_obj_filt) ~ Library.size,
  data = as(sample_data(Ps_obj_filt), "data.frame"), 
  method = "horn",
  permutations = 9999
))
## 
## Call:
## adonis(formula = otu_table(Ps_obj_filt) ~ Library.size, data = as(sample_data(Ps_obj_filt),      "data.frame"), permutations = 9999, method = "horn") 
## 
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##              Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## Library.size  1    0.4358 0.43579  1.3712 0.05626 0.2005
## Residuals    23    7.3095 0.31781         0.94374       
## Total        24    7.7453                 1.00000
PlotReadHist(as(otu_table(Ps_obj_filt), "matrix"))

notAllZero <- (rowSums(t(otu_table(Ps_obj_filt))) > 0)
vsn::meanSdPlot(as.matrix(log2(t(otu_table(Ps_obj_filt))[notAllZero, ] + 1)))

The difference in library sizes is low and its effect on the community composition is minimal. We’ll use the GMPR method for library size normalisation (Chen and Chen 2017)

Ps_obj_filt_GMPR <- Ps_obj_filt
Ps_obj_filt %>%
  otu_table(.) %>%
  t() %>%
  as(., "matrix") %>%
  GMPR() ->
  GMPR_factors
## Begin GMPR size factor calculation ...
## Completed!
## Please watch for the samples with limited sharing with other samples based on NSS! They may be outliers!
Ps_obj_filt %>%
  otu_table(.) %>%
  t() %*% diag(1 / GMPR_factors$gmpr) %>%
  t() %>%
  as.data.frame(., row.names = sample_names(Ps_obj_filt)) %>%
  otu_table(., taxa_are_rows = FALSE) ->
  otu_table(Ps_obj_filt_GMPR)
sample_data(Ps_obj_filt_GMPR)$Library.size <- sample_sums(Ps_obj_filt)
adonis(
  otu_table(Ps_obj_filt_GMPR) ~ Library.size,
  data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
  method = "horn",
  permutations = 9999
)
## 
## Call:
## adonis(formula = otu_table(Ps_obj_filt_GMPR) ~ Library.size,      data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999,      method = "horn") 
## 
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##              Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## Library.size  1    0.4358 0.43579  1.3712 0.05626 0.2068
## Residuals    23    7.3095 0.31781         0.94374       
## Total        24    7.7453                 1.00000
PlotLibDist(Ps_obj_filt_GMPR)

Did it improve anything?

PlotReadHist(as(otu_table(Ps_obj_filt_GMPR), "matrix"))

notAllZero <- (rowSums(t(otu_table(Ps_obj_filt_GMPR))) > 0)
vsn::meanSdPlot(as.matrix(log2(t(otu_table(Ps_obj_filt_GMPR))[notAllZero, ] + 1)))

Alpha diversity

Calculate and plot alpha diversity metrics

We do that by simulating 1000 rarefaction events and calculating the metrics each time. Then, the result is averaged.

rarefaction.mat <- matrix(0, nrow = nsamples(Ps_obj_filt), ncol = bootstraps)
rownames(rarefaction.mat) <- sample_names(Ps_obj_filt)
rich.ests <- list(S.obs = rarefaction.mat, S.chao1 = rarefaction.mat, se.chao1 = rarefaction.mat,
                   S.ACE = rarefaction.mat, se.ACE = rarefaction.mat)
for (i in seq(bootstraps)) {
  sub.OTUmat <- rrarefy(otu_table(Ps_obj_filt), min(rowSums(otu_table(Ps_obj_filt))))
  for (j in seq(length(rich.ests))) {
    rich.ests[[j]][, i] <- t(estimateR(sub.OTUmat))[, j]
  }
}
Richness <- data.frame(row.names = row.names(rich.ests[[1]]))
for (i in c(1, seq(2, length(rich.ests), 2))) {
  S <- apply(rich.ests[[i]], 1, mean)
  if (i == 1) { 
    se <- apply(rich.ests[[i]], 1, function(x) (mean(x)/sqrt(length(x))))
    } else se <- apply(rich.ests[[i + 1]], 1, mean)
  Richness <- cbind(Richness, S, se)
}
colnames(Richness) <- c("S.obs", "S.obs.se", "S.chao1", "S.chao1.se", "S.ACE", "S.ACE.se")
saveRDS(Richness, file = paste0("./Results/", Proj_name, "_richness.RDS"))
write.csv(Richness, file = paste0("./Results/", Proj_name, "_richness.csv"))
ses <- grep("\\.se", colnames(Richness))
Richness[, ses] %>% 
  gather(key = "est.se") -> se.dat
Richness[, -unique(ses)] %>% 
  gather(key = "est") -> mean.dat
n <- length(unique(mean.dat$est))
# diversity indices
diversity.inds <- list(Shannon = rarefaction.mat, inv.simpson = rarefaction.mat, BP = rarefaction.mat)
for (i in seq(bootstraps)) {
  sub.OTUmat <- rrarefy(otu_table(Ps_obj_filt), min(rowSums(otu_table(Ps_obj_filt))))
  diversity.inds$Shannon[, i] <- diversityresult(sub.OTUmat, index = 'Shannon', method = 'each site', digits = 3)[, 1]
  diversity.inds$inv.simpson[, i] <- diversityresult(sub.OTUmat, index = 'inverseSimpson', method = 'each site', digits = 3)[, 1]
  diversity.inds$BP[, i] <- diversityresult(sub.OTUmat, index = 'Berger', method = 'each site', digits = 3)[, 1]
}
Diversity <- data.frame(row.names = row.names(diversity.inds[[1]]))
for (i in seq(length(diversity.inds))) {
  S <- apply(diversity.inds[[i]], 1, mean)
  se <- apply(diversity.inds[[i]], 1, function(x) (mean(x)/sqrt(length(x))))
  Diversity <- cbind(Diversity, S, se)
}
colnames(Diversity) <- c("Shannon", "Shannon.se", "Inv.simpson", "Inv.simpson.se", "BP", "BP.se")
ses <- grep("\\.se", colnames(Diversity))
Diversity[, ses] %>% gather(key = "est.se") -> se.dat
Diversity[, -unique(ses)] %>% gather(key = "est") -> mean.dat
saveRDS(Diversity, file = paste0("./Results/", Proj_name, "_diversity.RDS"))
write.csv(Diversity, file = paste0("./Results/", Proj_name, "_diversity.csv"))

Test the differences in alpha diversity.

# make combined richness diversity
Richness_Diversity <- cbind(Richness, Diversity)
ses <- grep("\\.se", colnames(Richness_Diversity))
Richness_Diversity[, ses] %>% 
  gather(key = "est.se") -> 
  se.dat
Richness_Diversity[, -unique(ses)] %>% 
  gather(key = "Metric", 
         value = "Estimate") -> 
  mean.dat
Richness_Diversity_long <-
  cbind(
    Sample = rep(rownames(Richness_Diversity), times = length(unique(mean.dat$Metric))),
    mean.dat,
    lerr = mean.dat$Estimate - se.dat$value,
    herr = mean.dat$Estimate + se.dat$value
  )
Richness_Diversity_long$Metric <-
  factor(
    Richness_Diversity_long$Metric,
    levels = c("S.obs", "S.chao1", "S.ACE", "Shannon", "Inv.simpson", "BP"),
    labels = c("S obs.", "Chao1", "ACE", "Shannon", "Inv. Simpson" , "Berger Parker")
  )
Richness_Diversity_long %<>%
  cbind(., 
        sample_data(Ps_obj_filt))

# S Obs
data2test <- Richness_Diversity_long[Richness_Diversity_long$Metric == "S obs.", ] 

(mod_obsS <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "S obs.")))
## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares        65       705               9787     62082
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 54.4
## Estimated effects may be unbalanced

## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
## 
## Response: Estimate
##                    Sum Sq Df F value  Pr(>F)    
## (Intercept)        594952  1  201.25 3.1e-12 ***
## Location             3860  1    1.31   0.266    
## Rock.type            7702  1    2.61   0.121    
## Location:Rock.type   9787  1    3.31   0.083 .  
## Residuals           62082 21                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##     
## 313 
## 
##  Location 
##     Slope City
##       314  311
## rep    13   12
## 
##  Rock.type 
##     Chalk Limestone
##       307       318
## rep    13        12
## 
##  Location:Rock.type 
##         Rock.type
## Location Chalk Limestone
##    Slope 292   340      
##    rep     7     6      
##    City  326   296      
##    rep     6     6

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares        65       705               9787     62082
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 54.4
## Estimated effects may be unbalanced
# Post-hoc test
marginal <- emmeans(mod_obsS,
                   ~ Location : Rock.type)
summary(marginal)

Location

Rock.type

emmean

SE

df

lower.CL

upper.CL

Slope

Chalk

292

20.6

21

249

334

City

Chalk

326

22.2

21

280

372

Slope

Limestone

340

22.2

21

294

387

City

Limestone

296

22.2

21

249

342

contrast(marginal, 
         method = "pairwise", 
         adjust = "tukey")
##  contrast                         estimate   SE df t.ratio p.value
##  Slope Chalk - City Chalk            -34.6 30.2 21 -1.143  0.6680 
##  Slope Chalk - Slope Limestone       -48.8 30.2 21 -1.614  0.3930 
##  Slope Chalk - City Limestone         -4.1 30.2 21 -0.135  0.9990 
##  City Chalk - Slope Limestone        -14.3 31.4 21 -0.454  0.9680 
##  City Chalk - City Limestone          30.5 31.4 21  0.971  0.7670 
##  Slope Limestone - City Limestone     44.8 31.4 21  1.426  0.4980 
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
(obsS_pairwise <- cld(marginal,
                      alpha = 0.05,
                      Letters = letters,
                      adjust = "tukey")) # works with lm but not with two-factor ART

Location

Rock.type

emmean

SE

df

lower.CL

upper.CL

.group

1

Slope

Chalk

292

20.6

21

236

347

a

4

City

Limestone

296

22.2

21

235

356

a

2

City

Chalk

326

22.2

21

266

387

a

3

Slope

Limestone

340

22.2

21

280

401

a

(mod_obsS %>% 
  anova() %>% 
  mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
  mod_obsS_ANOVA)

Df

Sum Sq

Mean Sq

F value

Pr(>F)

Part Eta Sq

1

64.6

64.6

0.022

0.884

0.001

1

704.8

704.8

0.238

0.630

0.010

1

9786.6

9786.6

3.310

0.083

0.135

21

62081.9

2956.3

NA

NA

0.855

# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_obsS, Location ~ Rock.type)

# summary(as.glht(pairs(marginal))) # fails because of unbalanced design

# Shannon
(mod_Shannon <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "Shannon")))
## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares      1.07      0.18               0.16      4.41
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 0.458
## Estimated effects may be unbalanced

## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
## 
## Response: Estimate
##                    Sum Sq Df F value Pr(>F)    
## (Intercept)         313.2  1 1491.16 <2e-16 ***
## Location              1.1  1    5.38   0.03 *  
## Rock.type             0.2  1    0.78   0.39    
## Location:Rock.type    0.2  1    0.74   0.40    
## Residuals             4.4 21                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##      
## 3.55 
## 
##  Location 
##     Slope  City
##      3.75  3.33
## rep 13.00 12.00
## 
##  Rock.type 
##     Chalk Limestone
##      3.47      3.64
## rep 13.00     12.00
## 
##  Location:Rock.type 
##         Rock.type
## Location Chalk Limestone
##    Slope 3.60  3.92     
##    rep   7.00  6.00     
##    City  3.33  3.34     
##    rep   6.00  6.00

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares      1.07      0.18               0.16      4.41
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 0.458
## Estimated effects may be unbalanced
# Post-hoc test
marginal <- emmeans(mod_Shannon,
                   ~ Location : Rock.type)
summary(marginal)

Location

Rock.type

emmean

SE

df

lower.CL

upper.CL

Slope

Chalk

3.60

0.173

21

3.24

3.96

City

Chalk

3.33

0.187

21

2.94

3.72

Slope

Limestone

3.92

0.187

21

3.53

4.31

City

Limestone

3.34

0.187

21

2.95

3.73

contrast(marginal, 
         method = "pairwise", 
         adjust = "tukey")
##  contrast                         estimate    SE df t.ratio p.value
##  Slope Chalk - City Chalk            0.268 0.255 21  1.050  0.7220 
##  Slope Chalk - Slope Limestone      -0.321 0.255 21 -1.258  0.5980 
##  Slope Chalk - City Limestone        0.264 0.255 21  1.035  0.7310 
##  City Chalk - Slope Limestone       -0.589 0.265 21 -2.225  0.1490 
##  City Chalk - City Limestone        -0.004 0.265 21 -0.015  1.0000 
##  Slope Limestone - City Limestone    0.585 0.265 21  2.210  0.1530 
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
(Shannon_pairwise <- cld(marginal,
                      alpha = 0.05,
                      Letters = letters,
                      adjust = "tukey")) # works with lm but not with two-factor ART

Location

Rock.type

emmean

SE

df

lower.CL

upper.CL

.group

2

City

Chalk

3.33

0.187

21

2.82

3.84

a

4

City

Limestone

3.34

0.187

21

2.83

3.85

a

1

Slope

Chalk

3.60

0.173

21

3.13

4.07

a

3

Slope

Limestone

3.92

0.187

21

3.41

4.43

a

(mod_Shannon %>% 
  anova() %>% 
  mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
  mod_Shannon_ANOVA)

Df

Sum Sq

Mean Sq

F value

Pr(>F)

Part Eta Sq

1

1.069

1.069

5.090

0.035

0.184

1

0.176

0.176

0.839

0.370

0.030

1

0.156

0.156

0.744

0.398

0.027

21

4.411

0.210

NA

NA

0.759

# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_Shannon, Location ~ Rock.type)

# ACE
(mod_ACE <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "ACE")))
## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares         3      1924               8494     70754
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 58
## Estimated effects may be unbalanced

## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
## 
## Response: Estimate
##                     Sum Sq Df F value Pr(>F)    
## (Intercept)        4356003  1 1292.88 <2e-16 ***
## Location                12  1    0.00   0.95    
## Rock.type             1634  1    0.48   0.49    
## Location:Rock.type    8494  1    2.52   0.13    
## Residuals            70754 21                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##     
## 417 
## 
##  Location 
##     Slope City
##       417  418
## rep    13   12
## 
##  Rock.type 
##     Chalk Limestone
##       409       426
## rep    13        12
## 
##  Location:Rock.type 
##         Rock.type
## Location Chalk Limestone
##    Slope 392   446      
##    rep     7     6      
##    City  428   407      
##    rep     6     6

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares         3      1924               8494     70754
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 58
## Estimated effects may be unbalanced
# Post-hoc test
marginal <- emmeans(mod_ACE,
                   ~ Location : Rock.type)
summary(marginal)

Location

Rock.type

emmean

SE

df

lower.CL

upper.CL

Slope

Chalk

392

21.9

21

347

438

City

Chalk

428

23.7

21

379

477

Slope

Limestone

446

23.7

21

396

495

City

Limestone

407

23.7

21

358

457

contrast(marginal, 
         method = "pairwise", 
         adjust = "tukey")
##  contrast                         estimate   SE df t.ratio p.value
##  Slope Chalk - City Chalk            -35.5 32.3 21 -1.101  0.6930 
##  Slope Chalk - Slope Limestone       -53.2 32.3 21 -1.646  0.3760 
##  Slope Chalk - City Limestone        -14.8 32.3 21 -0.458  0.9670 
##  City Chalk - Slope Limestone        -17.6 33.5 21 -0.525  0.9520 
##  City Chalk - City Limestone          20.7 33.5 21  0.619  0.9250 
##  Slope Limestone - City Limestone     38.4 33.5 21  1.144  0.6670 
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
(ACE_pairwise <- cld(marginal,
                      alpha = 0.05,
                      Letters = letters,
                      adjust = "tukey")) # works with lm but not with two-factor ART

Location

Rock.type

emmean

SE

df

lower.CL

upper.CL

.group

1

Slope

Chalk

392

21.9

21

333

452

a

4

City

Limestone

407

23.7

21

343

472

a

2

City

Chalk

428

23.7

21

363

493

a

3

Slope

Limestone

446

23.7

21

381

510

a

(mod_ACE %>% 
  anova() %>% 
  mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
  mod_ACE_ANOVA)

Df

Sum Sq

Mean Sq

F value

Pr(>F)

Part Eta Sq

1

2.54

2.54

0.001

0.978

0.000

1

1923.90

1923.90

0.571

0.458

0.024

1

8494.42

8494.42

2.521

0.127

0.105

21

70753.94

3369.24

NA

NA

0.872

# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_ACE, Location ~ Rock.type)

# summary(as.glht(pairs(marginal))) # fails because of unbalanced design

#Inv. Simpson
(mod_InvSim <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "Inv. Simpson")))
## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares       470       125                 99      1025
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 6.99
## Estimated effects may be unbalanced

## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
## 
## Response: Estimate
##                    Sum Sq Df F value Pr(>F)    
## (Intercept)          7498  1  153.60  4e-11 ***
## Location              504  1   10.33 0.0042 ** 
## Rock.type             117  1    2.40 0.1364    
## Location:Rock.type     99  1    2.03 0.1686    
## Residuals            1025 21                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##      
## 17.4 
## 
##  Location 
##     Slope City
##      21.5 12.9
## rep  13.0 12.0
## 
##  Rock.type 
##     Chalk Limestone
##      15.2      19.7
## rep  13.0      12.0
## 
##  Location:Rock.type 
##         Rock.type
## Location Chalk Limestone
##    Slope 17.7  26.0     
##    rep    7.0   6.0     
##    City  12.7  13.0     
##    rep    6.0   6.0

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares       470       125                 99      1025
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 6.99
## Estimated effects may be unbalanced
# Post-hoc test
marginal <- emmeans(mod_InvSim,
                   ~ Location : Rock.type)
summary(marginal)

Location

Rock.type

emmean

SE

df

lower.CL

upper.CL

Slope

Chalk

17.7

2.64

21

12.20

23.2

City

Chalk

12.7

2.85

21

6.75

18.6

Slope

Limestone

26.0

2.85

21

20.09

32.0

City

Limestone

13.0

2.85

21

7.09

19.0

contrast(marginal, 
         method = "pairwise", 
         adjust = "tukey")
##  contrast                         estimate   SE df t.ratio p.value
##  Slope Chalk - City Chalk             5.01 3.89 21  1.290  0.5800 
##  Slope Chalk - Slope Limestone       -8.33 3.89 21 -2.140  0.1720 
##  Slope Chalk - City Limestone         4.67 3.89 21  1.200  0.6330 
##  City Chalk - Slope Limestone       -13.34 4.03 21 -3.310  0.0160 
##  City Chalk - City Limestone         -0.34 4.03 21 -0.090  1.0000 
##  Slope Limestone - City Limestone    13.00 4.03 21  3.220  0.0200 
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
(InvSim_pairwise <- cld(marginal,
                      alpha = 0.05,
                      Letters = letters,
                      adjust = "tukey")) # works with lm but not with two-factor ART

Location

Rock.type

emmean

SE

df

lower.CL

upper.CL

.group

2

City

Chalk

12.7

2.85

21

4.92

20.4

a

4

City

Limestone

13.0

2.85

21

5.26

20.8

a

1

Slope

Chalk

17.7

2.64

21

10.50

24.9

ab

3

Slope

Limestone

26.0

2.85

21

18.26

33.8

b

(mod_InvSim %>% 
  anova() %>% 
  mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
  mod_InvSim_ANOVA)

Df

Sum Sq

Mean Sq

F value

Pr(>F)

Part Eta Sq

1

470.4

470.4

9.64

0.005

0.273

1

125.3

125.3

2.57

0.124

0.073

1

99.3

99.3

2.03

0.169

0.058

21

1025.0

48.8

NA

NA

0.596

# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_InvSim, Location ~ Rock.type)

# summary(as.glht(pairs(marginal))) # fails because of unbalanced design


#Berger Parker
(mod_BP <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "Berger Parker")))
## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares    0.0643    0.0133             0.0000    0.1080
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 0.0717
## Estimated effects may be unbalanced

## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
## 
## Response: Estimate
##                    Sum Sq Df F value  Pr(>F)    
## (Intercept)         0.924  1  179.69 9.2e-12 ***
## Location            0.066  1   12.92  0.0017 ** 
## Rock.type           0.013  1    2.59  0.1228    
## Location:Rock.type  0.000  1    0.00  0.9939    
## Residuals           0.108 21                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##       
## 0.192 
## 
##  Location 
##      Slope   City
##      0.143  0.244
## rep 13.000 12.000
## 
##  Rock.type 
##      Chalk Limestone
##      0.214     0.168
## rep 13.000    12.000
## 
##  Location:Rock.type 
##         Rock.type
## Location Chalk Limestone
##    Slope 0.16  0.12     
##    rep   7.00  6.00     
##    City  0.27  0.22     
##    rep   6.00  6.00

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares    0.0643    0.0133             0.0000    0.1080
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 0.0717
## Estimated effects may be unbalanced
# Post-hoc test
marginal <- emmeans(mod_BP,
                   ~ Location : Rock.type)
summary(marginal)

Location

Rock.type

emmean

SE

df

lower.CL

upper.CL

Slope

Chalk

0.164

0.027

21

0.108

0.220

City

Chalk

0.268

0.029

21

0.207

0.328

Slope

Limestone

0.118

0.029

21

0.057

0.179

City

Limestone

0.221

0.029

21

0.160

0.282

contrast(marginal, 
         method = "pairwise", 
         adjust = "tukey")
##  contrast                         estimate     SE df t.ratio p.value
##  Slope Chalk - City Chalk          -0.1035 0.0399 21 -2.600  0.0740 
##  Slope Chalk - Slope Limestone      0.0460 0.0399 21  1.150  0.6620 
##  Slope Chalk - City Limestone      -0.0571 0.0399 21 -1.430  0.4950 
##  City Chalk - Slope Limestone       0.1495 0.0414 21  3.610  0.0080 
##  City Chalk - City Limestone        0.0464 0.0414 21  1.120  0.6800 
##  Slope Limestone - City Limestone  -0.1031 0.0414 21 -2.490  0.0910 
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
(BP_pairwise <- cld(marginal,
                      alpha = 0.05,
                      Letters = letters,
                      adjust = "tukey")) # works with lm but not with two-factor ART

Location

Rock.type

emmean

SE

df

lower.CL

upper.CL

.group

3

Slope

Limestone

0.118

0.029

21

0.038

0.198

a

1

Slope

Chalk

0.164

0.027

21

0.090

0.238

ab

4

City

Limestone

0.221

0.029

21

0.141

0.301

ab

2

City

Chalk

0.268

0.029

21

0.188

0.347

b

(mod_BP %>% 
  anova() %>% 
  mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
  mod_BP_ANOVA)

Df

Sum Sq

Mean Sq

F value

Pr(>F)

Part Eta Sq

1

0.064

0.064

12.52

0.002

0.347

1

0.013

0.013

2.59

0.123

0.072

1

0.000

0.000

0.00

0.994

0.000

21

0.108

0.005

NA

NA

0.582

# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_BP, Location ~ Rock.type)

# summary(as.glht(pairs(marginal))) # fails because of unbalanced design

Plot all alpha diversity metrics together

Richness_Diversity_long %>% 
  dplyr::filter(!Metric %in% c("Chao1", "ACE")) %>% 
    mutate_at(., "Metric", ~fct_recode(., "Observed S" = "S obs.", "Inv. Simpson" = "Inv. Simpson", "Berger Parker" = "Berger Parker")) %>% 
  mutate_at(., "Metric", ~fct_relevel(., "Observed S", "Inv. Simpson", "Shannon", "Berger Parker")) %>% 
  droplevels() ->
  Richness_Diversity_long2plot

p_alpha <- ggplot() +
  geom_violin(data = Richness_Diversity_long2plot,
             aes(
               x = Location,
               y = Estimate,
               ymin = lerr,
               ymax = herr
             ), colour = "grey",
              fill = "grey",
              alpha = 1 / 3) +
  geom_jitter2(data = Richness_Diversity_long2plot,
               aes(x = Location,
               y = Estimate,
               ymin = lerr,
               ymax = herr,
               colour = Location), size = 3, width = 0.2, alpha = 2/3) +
  scale_colour_manual(values = Gradient.colours[c(5, 6, 11)], name = "") +
  # geom_errorbar(alpha = 1 / 2, width = 0.3) +
  xlab("") +
  ylab("") +
  theme(axis.text.x = element_text(
    angle = 45,
    vjust = 0.9,
    hjust = 1
  ),
  legend.position="none") +
  facet_grid(Metric ~ Rock.type, scale = "free") +
  background_grid(major = "y",
                  minor = "none",
                  size.major = 0.8) 

dat_text <- data.frame(
  label = str_remove_all(c(obsS_pairwise$.group[1:4], 
                           Shannon_pairwise$.group[1:4], 
                           InvSim_pairwise$.group[1:4],
                           BP_pairwise$.group[1:4]), 
                         pattern = " "),
  Metric = fct_inorder(rep(levels(Richness_Diversity_long2plot$Metric), each = 4)),
  Rock.type = fct_c(obsS_pairwise$Rock.type[1:4], 
                           Shannon_pairwise$Rock.type[1:4], 
                           InvSim_pairwise$Rock.type[1:4],
                           BP_pairwise$Rock.type[1:4]), 
  x = fct_c(obsS_pairwise$Location[1:4], 
                           Shannon_pairwise$Location[1:4], 
                           InvSim_pairwise$Location[1:4],
                           BP_pairwise$Location[1:4]),
  # x     = as.factor(levels(Richness_Diversity_long2plot$Climate.Source)),
  y = rep(c(520, 45, 5.2, 0.52), each = 4)
  # y = rep(c(40, 140, 0.5), each = 6)
)
p_alpha <- p_alpha + geom_text(
  data = dat_text,
  mapping = aes(x = x, y = y, label = label),
  nudge_x = 0,
  nudge_y = 0
)
print(p_alpha)

Beta diversity

Calculate and plot beta diversity metrics.

(mod1 <-  adonis(
  otu_table(Ps_obj_filt_GMPR) ~ Location * Rock.type,
  data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
  method = "horn",
  permutations = 9999
))
## 
## Call:
## adonis(formula = otu_table(Ps_obj_filt_GMPR) ~ Location * Rock.type,      data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999,      method = "horn") 
## 
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##                    Df SumsOfSqs MeanSqs F.Model    R2 Pr(>F)  
## Location            1      0.63   0.628    2.11 0.081  0.023 *
## Rock.type           1      0.39   0.386    1.30 0.050  0.245  
## Location:Rock.type  1      0.49   0.485    1.63 0.063  0.098 .
## Residuals          21      6.25   0.297         0.806         
## Total              24      7.75                 1.000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(mod2 <- adonis(
  otu_table(Ps_obj_filt_GMPR) ~ Location,
  data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
  method = "horn",
  permutations = 9999
))
## 
## Call:
## adonis(formula = otu_table(Ps_obj_filt_GMPR) ~ Location, data = as(sample_data(Ps_obj_filt_GMPR),      "data.frame"), permutations = 9999, method = "horn") 
## 
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model    R2 Pr(>F)  
## Location   1      0.63   0.628    2.03 0.081  0.029 *
## Residuals 23      7.12   0.309         0.919         
## Total     24      7.75                 1.000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod1_pairwise <- PairwiseAdonis(
  otu_table(Ps_obj_filt_GMPR),
  sample_data(Ps_obj_filt_GMPR)$Location,
  sim.function = "vegdist",
  sim.method = "horn",
  p.adjust.m = "BH"
)
print(mod1_pairwise)
##          pairs total.DF F.Model     R2 p.value p.adjusted sig
## 1 City - Slope       24    2.03 0.0811   0.034      0.034   .
(sig_pairs1 <- list(as.character(mod1_pairwise$pairs[mod1_pairwise$p.adjusted < 0.05])))
## [[1]]
## [1] "City - Slope"
mod2_pairwise <- PairwiseAdonis(
  otu_table(Ps_obj_filt_GMPR),
  sample_data(Ps_obj_filt_GMPR)$Rock.type,
  sim.function = "vegdist",
  sim.method = "horn",
  p.adjust.m = "BH"
)
print(mod2_pairwise)
##               pairs total.DF F.Model     R2 p.value p.adjusted sig
## 1 Chalk - Limestone       24    1.21 0.0501    0.32       0.32
(sig_pairs2 <- list(as.character(mod2_pairwise$pairs[mod2_pairwise$p.adjusted < 0.05])))
## [[1]]
## character(0)
mod3_pairwise <- PairwiseAdonis(
  otu_table(Ps_obj_filt_GMPR),
  sample_data(Ps_obj_filt_GMPR)$Location.rock,
  sim.function = "vegdist",
  sim.method = "horn",
  p.adjust.m = "BH"
)
print(mod3_pairwise)
##                              pairs total.DF F.Model     R2 p.value p.adjusted sig
## 1      City:Chalk - City:Limestone       11    1.20 0.1069   0.344      0.413    
## 2     City:Chalk - Slope:Limestone       11    1.43 0.1251   0.180      0.270    
## 3         City:Chalk - Slope:Chalk       12    2.92 0.2099   0.017      0.102    
## 4 City:Limestone - Slope:Limestone       11    1.07 0.0964   0.429      0.429    
## 5     City:Limestone - Slope:Chalk       12    1.98 0.1528   0.091      0.182    
## 6    Slope:Limestone - Slope:Chalk       12    1.90 0.1471   0.080      0.182
(sig_pairs3 <- list(as.character(mod3_pairwise$pairs[mod3_pairwise$p.adjusted < 0.1])))
## [[1]]
## character(0)
Unifrac_mat <- UniFrac(Ps_obj_filt, 
                       weighted = TRUE, 
                       normalized = TRUE, 
                       parallel = TRUE, 
                       fast = TRUE)

(mod1 <-  adonis(
  Unifrac_mat ~ Location * Rock.type,
  data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
  method = "horn",
  permutations = 9999
))
## 
## Call:
## adonis(formula = Unifrac_mat ~ Location * Rock.type, data = as(sample_data(Ps_obj_filt_GMPR),      "data.frame"), permutations = 9999, method = "horn") 
## 
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##                    Df SumsOfSqs MeanSqs F.Model    R2 Pr(>F)  
## Location            1     0.047  0.0469    1.37 0.053  0.214  
## Rock.type           1     0.053  0.0529    1.54 0.060  0.147  
## Location:Rock.type  1     0.063  0.0632    1.84 0.072  0.068 .
## Residuals          21     0.719  0.0342         0.815         
## Total              24     0.882                 1.000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(mod2 <- adonis(
  Unifrac_mat ~ Location,
  data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
  method = "horn",
  permutations = 9999
))
## 
## Call:
## adonis(formula = Unifrac_mat ~ Location, data = as(sample_data(Ps_obj_filt_GMPR),      "data.frame"), permutations = 9999, method = "horn") 
## 
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model    R2 Pr(>F)
## Location   1     0.047  0.0469    1.29 0.053   0.26
## Residuals 23     0.835  0.0363         0.947       
## Total     24     0.882                 1.000

The difference between city and slope is significant based on the Morisita-Horn distances between OTUs but not based on UniFrac.

Calculate ordinations
Ps_obj_ord1 <- ordinate(Ps_obj_filt_GMPR, "CAP", "horn", formula = Ps_obj_filt_GMPR ~  Location * Rock.type)
Ps_obj_ord2 <- ordinate(Ps_obj_filt_GMPR, "CAP", "horn", formula = Ps_obj_filt_GMPR ~  Location)

explained <- eigenvals(Ps_obj_ord2)/sum( eigenvals(Ps_obj_ord2)) * 100
explained <- as.numeric(format(round(explained, 1), nsmall = 1))

Ps_obj_filt_GMPR %>% 
  plot_ordination(., Ps_obj_ord2, type = "samples", shape = "Rock.type", color = "Location", justDF = TRUE) -> 
  ord_df

p_ord <- ggplot(ord_df,
             aes(
               x = CAP1,
               y = MDS1,
               shape = Rock.type,
               color = Location
             )) +
  stat_ellipse(
    aes(x = CAP1, 
        y = MDS1, 
        fill = Location
        ),
    geom = "polygon",
    alpha = 1/4,
    type = "t",
    level = 0.9,
    # linetype = 2,
    inherit.aes = FALSE
  ) +
  geom_point(size = 4, alpha = 2 / 3) +
  guides(colour = guide_legend(title = "Location"), shape = guide_legend(title = "Rock.type")) +
  scale_colour_manual(values = Gradient.colours) +
  scale_fill_manual(values = Gradient.colours, guide = "none") +
  labs(x = sprintf("CAP1 (%s%%)", explained[1]), 
       y = sprintf("CAP2 (%s%%)", explained[2])) +
  coord_fixed(ratio = sqrt(explained[2] / explained[1])) +
   theme(legend.justification = "top")
  # facet_wrap(. ~ Rock.type)
print(p_ord)

Ps_obj_ord1 <- ordinate(Ps_obj_filt, "PCoA", "Unifrac", formula = Ps_obj_filt ~ Location * Rock.type)
Ps_obj_ord2 <- ordinate(Ps_obj_filt, "PCoA", "Unifrac", formula = Ps_obj_filt ~ Location)

explained <- Ps_obj_ord2$values$Relative_eig/sum(Ps_obj_ord2$values$Relative_eig) * 100
explained <- as.numeric(format(round(explained, 1), nsmall = 1))

Ps_obj_filt %>% 
  plot_ordination(., Ps_obj_ord2, type = "samples", shape = "Rock.type", color = "Location", justDF = TRUE) -> 
  ord_df

p_ord_phylo <- ggplot(ord_df,
             aes(
               x = Axis.1,
               y = Axis.2,
               shape = Rock.type,
               color = Location
             )) +
  stat_ellipse(
    aes(x = Axis.1, 
        y = Axis.2, 
        fill = Location
        ),
    geom = "polygon",
    alpha = 1/4,
    type = "t",
    level = 0.9,
    # linetype = 2,
    inherit.aes = FALSE
  ) +
  geom_point(size = 4, alpha = 2 / 3) +
  theme_bw(base_size = 14) +
  guides(colour = guide_legend(title = "Location"), shape = guide_legend(title = "Rock.type")) +
  scale_colour_manual(values = Gradient.colours) +
  scale_fill_manual(values = Gradient.colours, guide = "none") +
  labs(x = sprintf("CAP1 (%s%%)", explained[1]), 
       y = sprintf("CAP2 (%s%%)", explained[2])) +
  coord_fixed(ratio = sqrt(explained[2] / explained[1])) #+ 
  # facet_wrap(. ~ Rock.type)
print(p_ord_phylo)

Test differences between samples on the phylum level

STAMPR analysis of the differences of each phylum between locations using Aligned Rank Transformed ANOVA test and a post-hoc estimated marginal means.

Taxa_tests_phylum1 <- STAMPR2(Ps_obj_filt, vars2test = "Location", threshold = 0.001, outputfile = paste0(Proj_name, "_Location"))

pSTAMPR1 <- plotSTAMPR(Taxa_tests_phylum1, pair = "Slope - City")
print(pSTAMPR1)

Taxa_tests_phylum2 <- STAMPR2(Ps_obj_filt, vars2test = c("Location", "Rock.type"), threshold = 0.001, outputfile = paste0(Proj_name, "_Location_Rock"))

pSTAMPR2 <- plotSTAMPR(Taxa_tests_phylum2, pair = "Slope:Chalk - City:Chalk")
print(pSTAMPR2)

Taxonmic distribution analysis

Agglomerate data and tag rare taxa

Ps_obj_filt_GMPR_ra <- transform_sample_counts(Ps_obj_filt_GMPR, function(x){x / sum(x)} * 100)

Ps_obj_filt_GMPR_glom <- tax_glom(Ps_obj_filt_GMPR_ra, 
                             "Phylum", 
                             NArm = TRUE)
Ps_obj_filt_GMPR_glom_DF <- speedyseq::psmelt(Ps_obj_filt_GMPR_glom)
Ps_obj_filt_GMPR_glom_DF$Phylum %<>% as.character()
# Ps_obj_filt3_glom_DF %<>% mutate(Species = fct_relevel(Species, "NA", after = Inf))

# group dataframe by Phylum, calculate median rel. abundance
Ps_obj_filt_GMPR_glom_DF %>%
  group_by(Phylum) %>%
  summarise(median = median(Abundance)) ->
  medians

# find Phyla whose rel. abund. is less than 1%
Rare_phyla0.01 <- medians[medians$median <= 0.01, ]$Phylum

# change their name to "Rare"
Ps_obj_filt_GMPR_glom_DF[Ps_obj_filt_GMPR_glom_DF$Phylum %in% Rare_phyla0.01, ]$Phylum <- 'Rare'
# re-group
Ps_obj_filt_GMPR_glom_DF %>%
  group_by(Sample, Phylum, Location, Rock.type, Location.rock) %>%
  summarise(Abundance = sum(Abundance)) ->
  Ps_obj_filt_GMPR_glom_DF_2plot

# ab.taxonomy$Freq <- sqrt(ab.taxonomy$Freq)
# Ps_obj_filt3_glom_rel_DF$Phylum %<>% sub("unclassified", "Unclassified", .)
# Ps_obj_filt3_glom_rel_DF$Phylum %<>% sub("uncultured", "Unclassified", .)

Ps_obj_filt_GMPR_glom_DF_2plot %>% 
  group_by(Sample) %>% 
  filter(Phylum == "Rare") %>% 
  summarise(`Rares (%)` = sum(Abundance)) -> 
  Rares

Summarise taxonomy

# Percentage of reads classified as rare 
Rares %>%
  kable(., digits = c(2), caption = "Percentage of reads per sample classified as rare:") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)

Percentage of reads per sample classified as rare:

Sample

Rares (%)

ShivSClk1S12

0.01

ShivSClk2S15

0.59

ShivSClk3S16

0.02

ShivSClk4S17

0.01

ShivSClk5S18

0.01

ShivSLimek1S13

0.02

ShivSLimek2S14

0.04

ShivSLimek3S19

14.27

ShivSLimek4S20

11.48

ShivSLimek5S21

0.01

ShivSLimek6S25

0.01

SlpClk1S3

0.24

SlpClk2S4

0.04

SlpClk3S5

0.04

SlpClk4S6

0.03

SlpClk5S7

0.01

SlpClk6S8

0.03

SlpClk7S9

0.01

SlpClk8S10

0.03

SlpClk9S11

0.05

SlpLime1S1

0.01

SlpLime2S2

0.04

SlpLime3S22

0.09

SlpLime4S23

0.01

SlpLime5S24

0.02

sample_order <- match(Rares$Sample, row.names(sample_data(Ps_obj_filt_GMPR_glom)))
Rares %<>% arrange(., sample_order)

Rares %>% 
  cbind(., sample_data(Ps_obj_filt_GMPR_glom)) %>% 
  group_by(Location.rock) %>% 
  setNames(make.names(names(.), unique = TRUE)) %>% # fails for some reason without it
  summarise(`Rares (%)` = mean(`Rares....`)) -> 
  Rares_merged

# Percentage of reads classified as rare 
Rares_merged %>%
  kable(., digits = c(2), caption = "Percentage of reads per sample classified as rare:") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)

Percentage of reads per sample classified as rare:

Location.rock

Rares (%)

City:Chalk

0.11

City:Limestone

4.31

Slope:Chalk

0.03

Slope:Limestone

0.07

Plot taxonomy box-plot

Ps_obj_filt_GMPR_glom_DF_2plot %>% 
  group_by(Phylum) %>% 
  summarise(sum.Taxa = sum(Abundance)) %>% 
  arrange(desc(sum.Taxa)) -> Taxa_rank
Ps_obj_filt_GMPR_glom_DF_2plot$Phylum %<>% 
  factor(., levels = Taxa_rank$Phylum) %>% 
  fct_relevel(., "Rare", after = Inf)
  
p_taxa_box <-
  ggplot(Ps_obj_filt_GMPR_glom_DF_2plot, aes(x = Phylum, y = (Abundance))) +
  geom_boxplot(aes(group = interaction(Phylum, Location)), position = position_dodge(width = 0.9), fatten = 1) +
  geom_point(
    aes(colour = Rock.type),
    position = position_jitterdodge(dodge.width = 1),
    alpha = 1 / 2,
    stroke = 0,
    size = 2
  ) +
  scale_colour_manual(values = Gradient.colours, name = "") +
  theme_bw()+
  # theme_cowplot(font_size = 11, font_family = f_name) +
  labs(x = NULL, y = "Relative abundance (%)") +
  guides(colour = guide_legend(override.aes = list(size = 5))) +
  facet_grid(Location ~ .) +
  background_grid(major = "xy",
                  minor = "none") +
  theme(axis.text.x = element_text(
    angle = 45,
    vjust = 0.9,
    hjust = 0.9
  ),
  legend.position = c(.99, .99),
  legend.justification = c("right", "top"),
  legend.box.just = "top",
  legend.margin = margin(0, 3, 3, 3))
print(p_taxa_box)

Differential abundance models

Tag rare phyla (for plotting purposes only)

Ps_obj_filt_GMPR_glom <- tax_glom(Ps_obj_filt_GMPR, 
                             "Phylum", 
                             NArm = TRUE) # glomerate to the phylum level
Ps_obj_filt_GMPR_glom_rel <- transform_sample_counts(Ps_obj_filt_GMPR_glom, function(x) x / sum(x)) # transform to rel. ab.
Ps_obj_filt_GMPR_glom_rel_DF <- speedyseq::psmelt(Ps_obj_filt_GMPR_glom_rel) # generate a df
Ps_obj_filt_GMPR_glom_rel_DF$Phylum %<>% as.character() # factor to char

# group dataframe by Phylum, calculate median rel. abundance
Ps_obj_filt_GMPR_glom_rel_DF %>%
  group_by(Phylum) %>%
  summarise(median = median(Abundance)) ->
  medians

# find Phyla whose median rel. abund. is less than 0.5%
Rare_phyla0.005 <- medians[medians$median <= 0.005, ]$Phylum

# change their name to "Rare"
Ps_obj_filt_GMPR_glom_rel_DF[Ps_obj_filt_GMPR_glom_rel_DF$Phylum %in% Rare_phyla0.005, ]$Phylum <- 'Rare'

# re-group
Ps_obj_filt_GMPR_glom_rel_DF %>%
  group_by(Phylum) %>%
  summarise(Abundance = sum(Abundance)) %>% 
  arrange(desc(Abundance)) -> Taxa_rank

Detect differentially abundant OTUs using corncob (Martin, Witten, and Willis 2020)

comparison_string <- c("City", "Slope")

Ps_obj_filt %>%
  subset_samples(Location %in% c(comparison_string[1], comparison_string[2])) %>%
  tax_glom("Order") ->
  Ps_obj_filt_pairwise_glom

# Test differential abundance for location
da_Loc <- differentialTest(formula = ~ Location,
                           phi.formula = ~ Location,
                           formula_null = ~ 1,
                           phi.formula_null = ~ Location, 
                           test = "Wald", boot = FALSE,
                           data = Ps_obj_filt,
                           fdr_cutoff = 0.05,
                           full_output = TRUE)
da_Loc_intervals <- plot(da_Loc, level = "Class", data_only = T)
which(is.na(da_Loc$p)) %>% names
## [1] "OTU398" "OTU164" "OTU476" "OTU641" "OTU239" "OTU481" "OTU727" "OTU451" "OTU245"
Ps_obj_filt %>%
  transform_sample_counts(., function(x) x / sum(x) * 100) %>% 
  taxa_sums(.) %>% 
  map_dbl(~(.x / nsamples(Ps_obj_filt))) %>% 
  enframe(name = "OTU", value = "Mean abundance (%)") -> 
  baseMean

map(da_Loc$all_models,15) %>% 
  map(.,2) %>% 
  unlist %>%  # grab all mu.LocationSlope Estimates (differences in estimated population relative abundance)
  bind_cols(OTU = taxa_names(Ps_obj_filt), 
            tax_table(Ps_obj_filt), 
            `Differential abundance` = .,
            Significance = fct_recode(as_factor(taxa_names(Ps_obj_filt) %in% da_Loc$significant_taxa), Pass = "TRUE", Fail = "FALSE"),
            ymin = as.numeric(NA),
            ymax = as.numeric(NA)
            ) %>%
  left_join(., baseMean, by = "OTU") ->
  da_Loc_df

da_Loc_df %<>% rows_update(., tibble(ymin = da_Loc_intervals$xmin, OTU = da_Loc$significant_taxa), by = "OTU")
da_Loc_df %<>% rows_update(., tibble(ymax = da_Loc_intervals$xmax, OTU = da_Loc$significant_taxa), by = "OTU")
da_Loc_df[da_Loc_df$Phylum %in% Rare_phyla0.005, "Phylum"] <- 'Rare' # rare_phyla is

p_corncob_loc <- GGPlotCorncob(da_Loc_df, OTU_labels = FALSE, Taxa = "Phylum", Y_val = "Differential abundance", sig_level = 0.05, Rank = Taxa_rank)

corncob_summary <- tibble(Label = c(paste0("⬆", sum(da_Loc_df$`Differential abundance` > 0 &  da_Loc_df$Significance == "Pass"), " ⬇", sum(da_Loc_df$`Differential abundance` < 0 &  da_Loc_df$Significance == "Pass"), " (", nrow(da_Loc_df), ")")))

p_corncob_loc <- p_corncob_loc +
  geom_text(
    data    = corncob_summary,
    mapping = aes(x = Inf, y = Inf, label = Label),
    hjust   = 1.1,
    vjust   = 1.6
  ) +
  scale_size_continuous(name = "Mean abundance (%)",
                        limits = c(min(da_Loc_df$`Mean abundance (%)`), max(da_Loc_df$`Mean abundance (%)`))
  ) + 
  labs(title = paste(comparison_string, collapse = " - ")) +
  coord_cartesian(ylim = c(-15, 15))
print(p_corncob_loc)

Modelling differential abundance and variance between locations discovered length(da_Loc$significant_taxa)

comparison_string <- c("Limestone", "Chalk")
# Test differential abundance and variance for rock type
da_Rock <- differentialTest(formula = ~ Rock.type,
                                 phi.formula = ~ Rock.type,
                                 formula_null = ~ 1,
                                 phi.formula_null = ~ Rock.type, 
                                 test = "Wald", boot = FALSE,
                                 data = Ps_obj_filt,
                                 fdr_cutoff = 0.05,
                                full_output = TRUE)
da_Rock_intervals <- plot(da_Rock, level = "Class", data_only = TRUE)
which(is.na(da_Rock$p)) %>% names
##  [1] "OTU780" "OTU164" "OTU580" "OTU679" "OTU696" "OTU712" "OTU519" "OTU503" "OTU542"
## [10] "OTU451" "OTU114" "OTU309" "OTU502" "OTU245" "OTU229"
map(da_Rock$all_models,15) %>% 
  map(.,2) %>% 
  unlist %>%  # grab all mu.LocationSlope Estimates (differences in estimated population relative abundance)
  bind_cols(OTU = taxa_names(Ps_obj_filt), 
            tax_table(Ps_obj_filt), 
            `Differential abundance` = .,
            Significance = fct_recode(as_factor(taxa_names(Ps_obj_filt) %in% da_Rock$significant_taxa), Pass = "TRUE", Fail = "FALSE"),
            ymin = as.numeric(NA),
            ymax = as.numeric(NA)
            ) %>%
  left_join(., baseMean, by = "OTU") ->
  da_Rock_df

da_Rock_df %<>% rows_update(., tibble(ymin = da_Rock_intervals$xmin, OTU = da_Rock$significant_taxa), by = "OTU")
da_Rock_df %<>% rows_update(., tibble(ymax = da_Rock_intervals$xmax, OTU = da_Rock$significant_taxa), by = "OTU")
da_Rock_df[da_Rock_df$Phylum %in% Rare_phyla0.005, "Phylum"] <- 'Rare' # rare_phyla is

p_corncob_rock <- GGPlotCorncob(da_Rock_df, OTU_labels = FALSE, Taxa = "Phylum", Y_val = "Differential abundance", sig_level = 0.05, Rank = Taxa_rank)

corncob_summary <- tibble(Label = c(paste0("⬆", sum(da_Rock_df$`Differential abundance` > 0 &  da_Rock_df$Significance == "Pass"), " ⬇", sum(da_Rock_df$`Differential abundance` < 0 &  da_Rock_df$Significance == "Pass"), " (", nrow(da_Rock_df), ")")))

p_corncob_rock <- p_corncob_rock +
  geom_text(
    data    = corncob_summary,
    mapping = aes(x = Inf, y = Inf, label = Label),
    hjust   = 1.1,
    vjust   = 1.6
  ) +
  scale_size_continuous(name = "Mean abundance (%)",
                        limits = c(min(da_Rock_df$`Mean abundance (%)`), max(da_Rock_df$`Mean abundance (%)`))
  ) + 
  labs(title = paste(comparison_string, collapse = " - ")) +
  coord_cartesian(ylim = c(-15, 15))
print(p_corncob_rock)

Modelling differential abundance and variance between rock types discovered length(da_Rock$significant_taxa)

# Test differential abundance for location, control for Rock.type for both cases
comparison_string <- c("City", "Slope")
da_Loc_exRock <- differentialTest(formula = ~ Location + Rock.type,
                                 phi.formula = ~ Location + Rock.type,
                                 formula_null = ~ Rock.type,
                                 phi.formula_null = ~ Location + Rock.type, 
                                 test = "Wald", boot = FALSE,
                                 data = Ps_obj_filt,
                                 fdr_cutoff = 0.05,
                                full_output = TRUE)
da_Loc_exRock_intervals <- plot(da_Loc_exRock, level = "Class", data_only = TRUE)

which(is.na(da_Loc_exRock$p)) %>% names
##  [1] "OTU780" "OTU398" "OTU164" "OTU476" "OTU580" "OTU679" "OTU696" "OTU712" "OTU641"
## [10] "OTU519" "OTU239" "OTU481" "OTU503" "OTU542" "OTU727" "OTU451" "OTU114" "OTU309"
## [19] "OTU502" "OTU245" "OTU229"
map(da_Loc_exRock$all_models,15) %>% 
  map(.,2) %>% 
  unlist %>%  # grab all mu.LocationSlope Estimates (differences in estimated population relative abundance)
  bind_cols(OTU = taxa_names(Ps_obj_filt), 
            tax_table(Ps_obj_filt), 
            `Differential abundance` = .,
            Significance = fct_recode(as_factor(taxa_names(Ps_obj_filt) %in% da_Loc_exRock$significant_taxa), Pass = "TRUE", Fail = "FALSE"),
            ymin = as.numeric(NA),
            ymax = as.numeric(NA)
            ) %>%
  left_join(., baseMean, by = "OTU") ->
  da_Loc_exRock_df

da_Loc_exRock_df %<>% rows_update(., tibble(ymin = da_Loc_exRock_intervals$xmin, OTU = da_Loc_exRock$significant_taxa), by = "OTU")
da_Loc_exRock_df %<>% rows_update(., tibble(ymax = da_Loc_exRock_intervals$xmax, OTU = da_Loc_exRock$significant_taxa), by = "OTU")
da_Loc_exRock_df[da_Loc_exRock_df$Phylum %in% Rare_phyla0.005, "Phylum"] <- 'Rare' # rare_phyla is

p_corncob_locExroc <- GGPlotCorncob(da_Loc_exRock_df, OTU_labels = FALSE, Taxa = "Phylum", Y_val = "Differential abundance", sig_level = 0.05, Rank = Taxa_rank)

corncob_summary <- tibble(Label = c(paste0("⬆", sum(da_Loc_exRock_df$`Differential abundance` > 0 &  da_Loc_exRock_df$Significance == "Pass"), " ⬇", sum(da_Loc_exRock_df$`Differential abundance` < 0 &  da_Loc_exRock_df$Significance == "Pass"), " (", nrow(da_Loc_exRock_df), ")")))

p_corncob_locExroc <- p_corncob_locExroc +
  geom_text(
    data    = corncob_summary,
    mapping = aes(x = Inf, y = Inf, label = Label),
    hjust   = 1.1,
    vjust   = 1.6
  ) +
  scale_size_continuous(name = "Mean abundance (%)",
                        limits = c(min(da_Loc_exRock_df$`Mean abundance (%)`), max(da_Loc_exRock_df$`Mean abundance (%)`))
  ) + 
  labs(title = paste(comparison_string, collapse = " - ")) +
  coord_cartesian(ylim = c(-15, 15))
print(p_corncob_locExroc)

Modelling differential abundance between locations, while controlling for rock type discovered length(da_Loc_exRock$significant_taxa)

mod260 <- bbdml(formula = OTU260 ~ 1,
             phi.formula = ~ 1,
             data = Ps_obj_filt)
mod260_Loc <- bbdml(formula = OTU260 ~ Location,
             phi.formula = ~ Location,
             data = Ps_obj_filt)
mod260_Loc_rock <- bbdml(formula = OTU97 ~ Location*Rock.type,
             phi.formula = ~ Location*Rock.type,
             data = Ps_obj_filt)
lrtest(mod_null = mod260, mod = mod260_Loc)
## [1] 0.0029
# lrtest(mod_null = mod260_Loc, mod = mod260_Loc_rock)
summary(mod260_Loc)
## 
## Call:
## bbdml(formula = OTU260 ~ Location, phi.formula = ~Location, data = Ps_obj_filt)
## 
## 
## Coefficients associated with abundance:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -8.562      0.239  -35.78   <2e-16 ***
## Location1      0.676      0.239    2.82     0.01 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Coefficients associated with dispersion:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -8.535      0.409  -20.89  1.6e-15 ***
## Location1      1.427      0.409    3.49   0.0022 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Log-likelihood: -80.58
plot(mod260_Loc, color = "Location", shape = "Rock.type") # add total = TRUE for total counts (i.e. not relative abundance)

Compose figures

composite_plot <- ((p_alpha + p_taxa_box +  plot_layout(widths = c(1, 2))) /(p_ord + pSTAMPR1) + plot_annotation(tag_levels = 'A') & theme(plot.tag = element_text(size = f_size)))

plot_file <- "./Results/Microbiome_1"
svglite(paste0(plot_file, ".svg"), 
        width = 14, 
        height = 10.5)
print(composite_plot)
dev.off()
## pdf 
##   2
ggsave(
  paste0(plot_file, ".png"),
  composite_plot,
  device = agg_png,
  width = 14, 
  height = 10.5, 
  units = "cm", 
  res = 900,
  scaling = 0.38
)
gz(paste0(plot_file, ".svg"), paste0(plot_file, ".svgz"))
knitr::include_graphics(paste0(plot_file, ".png"))
plot_file <- "./Results/Microbiome_2"
svglite(paste0(plot_file, ".svg"), 
        width = 12, 
        height = 10)
print(p_corncob_locExroc)
dev.off()
## pdf 
##   2
ggsave(
  paste0(plot_file, ".png"),
  p_corncob_locExroc,
  device = agg_png,
  width = 12, 
  height = 10, 
  units = "cm", 
  res = 900,
  scaling = 0.38
)
gz(paste0(plot_file, ".svg"), paste0(plot_file, ".svgz"))

References

Chen, Jun, and Li Chen. 2017. “GMPR: A novel normalization method for microbiome sequencing data.” bioRxiv, February, 112565. https://doi.org/10.1101/112565.

Martin, Bryan D., Daniela Witten, and Amy D. Willis. 2020. “Modeling Microbial Abundances and Dysbiosis with Beta-Binomial Regression.” Annals of Applied Statistics 14 (1): 94–115. https://doi.org/10.1214/19-AOAS1283.